{-
  Copyright (c) Meta Platforms, Inc. and affiliates.
  All rights reserved.

  This source code is licensed under the BSD-style license found in the
  LICENSE file in the root directory of this source tree.
-}

{-# LANGUAGE CPP, ApplicativeDo #-}

-- | An 'Indexer' that uses an external process, as opposed to a
-- library. The indexer might involve running an executable to
-- generate JSON files and then writing them to the repo using the
-- 'glean' CLI tool, or it might involve running a server.
--
module Glean.Indexer.External
  ( externalIndexer
  , Ext(..)
  , Flavour(..)
  , sendJsonBatches
  ) where

import Control.Exception
import Control.Monad
import qualified Data.Aeson as Aeson
import qualified Data.Aeson.Types as Aeson
import Data.Char(isAlphaNum)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List.Extra as L
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Options.Applicative as O
import System.Directory
import System.FilePath
import System.IO.Temp
import System.Process

import Control.Concurrent.Stream (stream)
import Facebook.Fb303
import Facebook.Service
#ifdef FBTHRIFT
import qualified Thrift.Server.CppServer as ThriftServer
#else
import qualified Thrift.Server.HTTP as ThriftServer
#endif

import qualified Glean
import qualified Glean.LocalOrRemote as LocalOrRemote
import Glean.LocalOrRemote (BackendKind(..), LocalOrRemote(..))
import Glean.Remote (ThriftBackend(..))
import Glean.Derive
import qualified Glean.Handler as GleanHandler
import Glean.Indexer
import Glean.Write
import Glean.Util.Service

listDirectoryRecursive :: FilePath -> IO [FilePath]
listDirectoryRecursive dir = do
  contents <- listDirectory dir
  paths <- forM contents $ \name -> do
      let path = dir </> name
      isDirectory <- doesDirectoryExist path
      if isDirectory
          then listDirectoryRecursive path
          else return [path]
  return (concat paths)

externalIndexer :: Indexer Ext
externalIndexer = Indexer
  { indexerShortName = "external"
  , indexerDescription = "A generic indexer that runs an external binary"
  , indexerOptParser = extOptions
  , indexerRun = execExternal
  }

data Flavour = Json | Server

data Ext = Ext
  { extRunScript :: FilePath
  , extArgs :: [String]
  , extFlavour :: Flavour
  , extDerivePredicates :: [Text]
  , extAllowNonZeroExit :: Bool
  }

extOptions :: O.Parser Ext
extOptions = do
  extRunScript <- O.strOption $
    O.long "binary" <> O.metavar "PATH" <>
    O.help "script to run to produce facts, arguments supplied by --arg/--args"
  extArgs <- fmap concat $ O.many $
    fmap (:[]) (O.strOption $ O.long "arg" <> O.metavar "ARG")
    O.<|>
    fmap words (O.strOption $ O.long "args" <> O.metavar "ARGS")
  extFlavour <-
    O.flag' Json (
      O.long "json" <>
      O.help "the binary should put JSON files in ${JSON_BATCH_DIR}") O.<|>
    O.flag' Server (
      O.long "server" <>
      O.help ("the binary should connect to a Glean server at " <>
        "${GLEAN_SERVER} to write facts"))
  extDerivePredicates <-
    fmap (maybe [] (Text.splitOn "," . Text.pack)) $
    O.optional $
    O.strOption $
    O.long "derive" <> O.metavar "PREDICATE,PREDICATE,..." <>
    O.help "predicates to derive after writing facts (ordered)"
  extAllowNonZeroExit <-
    O.switch $ O.long "allow-non-zero-exit" <>
    O.help "allow the binary to exit with a non-zero exit code"
  return Ext{..}

-- | Finish decoding parameters from @glean_test@ by
-- performing substitutions on @TEST_*@ variables, and return the
-- 'RunIndexer'.
execExternal :: Ext -> RunIndexer
execExternal Ext{..} env repo IndexerParams{..} = do index; derive
  where
  derive = forM_ extDerivePredicates $ \pred ->
    derivePredicate env repo Nothing Nothing
      (parseRef pred) Nothing

  repoName = Text.unpack (Glean.repo_name repo)
  repoHash = Text.unpack (Glean.repo_hash repo)

  vars = HashMap.fromList
    [ ("TEST_REPO", Glean.showRepo repo)
    , ("TEST_REPO_NAME", repoName)
    , ("TEST_REPO_HASH", repoHash)
    , ("TEST_ROOT", indexerRoot)
    ]

  -- We could make this configurable, but there's not a lot to be
  -- gained and it would have to be plumbed through all the separate
  -- language indexers that build on External.
  maxConcurrency = 20 :: Int

  run = if extAllowNonZeroExit then void . system else callCommand
  index = case extFlavour of
    Json -> do
      jsonBatchDir <- createTempDirectory indexerOutput "glean-json"
      let jsonVars = HashMap.insert "JSON_BATCH_DIR" jsonBatchDir vars
      let cmdLine = unwords (extRunScript : map (quoteArg . subst jsonVars) extArgs)
      run cmdLine
      files <- listDirectoryRecursive jsonBatchDir
      stream maxConcurrency (forM_ files) $ \file -> do
        (batches, schema_id) <- fileToBatches (jsonBatchDir </> file)
        let opts = schemaIdToOpts schema_id

        void $ LocalOrRemote.sendJsonBatch env repo batches opts

    Server -> do
      let
        go service = do
          let serverVars = HashMap.insert "GLEAN_SERVER" service vars
          let cmdLine = unwords (extRunScript : map (quoteArg . subst serverVars) extArgs)
          run cmdLine
      case backendKind env of
        BackendEnv env -> do
          fb303 <- newFb303 "gleandriver"
          let state = GleanHandler.State fb303 env
          withBackgroundFacebookService
            (GleanHandler.fb303State state)
            (GleanHandler.handler state)
            ThriftServer.defaultOptions
            $ \server -> go ("localhost:" <>
                show (ThriftServer.serverPort server))
        BackendThrift thrift -> do
          let clientConfig = thriftBackendClientConfig thrift
          go (serviceToString (Glean.clientConfig_serv clientConfig))

  subst vars ('$':'{':s)
    | (var,'}':rest) <- break (=='}') s
    , all (\c -> isAlphaNum c || c == '_') var =
        HashMap.lookupDefault ("${" <> var <> "}") var vars ++ subst vars rest
  subst vars (c:s) = c : subst vars s
  subst _ "" = ""

  -- Quotes a value to allow it to be safely exposed to the shell
  -- The method used is to replace ' with '"'"' and wrap the value inside
  -- single quotes. This works for POSIX shells.
  quoteArg t =  q <> L.intercalate "'\"'\"'" (L.splitOn q t) <> q
    where
      q = "'"



sendJsonBatches
  :: LocalOrRemote.LocalOrRemote b
  => b
  -> Glean.Repo
  -> String
  -> Aeson.Value
  -> IO ()
sendJsonBatches backend repo msg val = do
  (batches, schema_id) <- case Aeson.parse parseJsonFactBatches val of
    Aeson.Error s -> throwIO $ ErrorCall $ msg <> ": " <> s
    Aeson.Success x -> return x
  let opts = schemaIdToOpts schema_id
  void $ LocalOrRemote.sendJsonBatch backend repo batches opts
